# Implement the rescaled change time method on single-day data
# For AAPL 2019-01-02, WITHSIZE, SUPPORT 4, DELTA 0.5

library(data.table)
library(tidyr)
library(xts)
library(dplyr)
library(pbmcapply)
load("~/Desktop/orderbook/Research/R/gof/AAPL_2019-01-02.RData")

support_max = 20
delta = 0.125
config = "nosize"
#config = ""
if(config == "nosize"){
  event_size = ""
}else{
  event_size = "eventsize_"
}

# Load Hawkes estimation data
# hawkes = readRDS(paste0("~/Desktop/Research/R/gof/images/hawkes_markov_pref_price_", event_size,
#                         data_file_name, paste0("_support",toString(support_max),"delta", toString(delta),".rds") ))
# 


#delta = hawkes$delta
#support_max = hawkes$support_max
support_seq = seq(0,support_max,delta)

book =  book[modifying_queue %in% -3:3]
char_cols <- c("time", "note","ref_price_event", "queue_and_event", "event_type_coarsen")
breaks_2 = c( 34200 + seq(0,1800,60),  36000 + seq(300, 19800, 300),   55800 + seq(60, 1800,60) )
book[, time_customizedtwo := cut(book$time, breaks = breaks_2, labels =c(1:126),include.lowest=TRUE,right = FALSE)]
book$time_customizedtwo = as.numeric(book$time_customizedtwo)
state <- data.matrix(book[,mget(setdiff(colnames(book), char_cols))])
event_name <- paste(colnames(event)[-1], collapse = ".")
paste0(rep(sprintf("%+d", c(1:3, -(1:3))), each=3), "(", c("+", "-", "t"), ")")
e1 <- "+1(+).+1(-).+1(t).+2(+).+2(-).+2(t).+3(+).+3(-).+3(t).-1(+).-1(-).-1(t).-2(+).-2(-).-2(t).-3(+).-3(-).-3(t)"
e2 <- "-3.-2.-1.1.2.3"
e3 <- paste(e1, "p+(+).p+(-).p+(t).p-(+).p-(-).p-(t)", sep=".")
stopifnot(event_name == e3)
time_customizedtwo <- state[, "time_customizedtwo"]
#price <- state[,"ref_price"]
#predetermined_price_unit <- event_size_by_ticker[ticker == data_ticker, price]
#if(length(predetermined_price_unit) != 1){
#  price_unit <- predetermined_price_unit
#}else{
#  price_unit <- 10000
#}
#price <- floor(price / price_unit)
state <- state[,c("q_std_plus_1", "q_std_plus_1", "q_std_plus_1",
                  "q_std_plus_2", "q_std_plus_2", "q_std_plus_2",
                  "q_std_plus_3", "q_std_plus_3", "q_std_plus_3",
                  "q_std_minus_1", "q_std_minus_1", "q_std_minus_1",
                  "q_std_minus_2", "q_std_minus_2", "q_std_minus_2",
                  "q_std_minus_3", "q_std_minus_3", "q_std_minus_3",
                  "bid_ask_spread", # for p+(+)
                  "q_std_best_ask", # for p+(-)
                  "q_std_best_ask", # for p+(t)
                  "bid_ask_spread", # for p-(+)
                  "q_std_best_bid", # for p-(-)
                  "q_std_best_bid" # for p-(t)
)]
state[state > 9] <- 9
state_list <- lapply(1:(ncol(event)-1), function(i){
  cbind(state=state[,i])
})
state_list$time_customizedtwo = time_customizedtwo
book = book[, c("time","queue_and_event", "time_customizedtwo")]


# Selected event for goodness-of-fit testing is "+2(+)"
#event_names = colnames(event)[-1]
#selected_event = event_names[4]
#book[, paste0("state_",selected_event) :=state_list[[4]][,1]]




# get change time
change_time = c()
change_time = pbmclapply(1:dim(book)[1], function(i){
  c(change_time, seq(book$time[i], book$time[i]+support_max, delta  ))
},mc.cores = 10)
change_time = unlist(change_time)
change_time = sort(unique(change_time))
change_time = data.frame("time"= change_time)




rm(price, state,state_list, book_lambda, book_event, book_msg, book_order,event, ref_price_nondup, 
   average_event_size,estimate_hawkes, plot_hawkes, get_theta_cls, get_hawkes_kernel_bigdt,
   get_bigdt_list, get_theta_cls_bigdt_fromfile, plot_hawkes_kernel, organize_kernel_output, 
   get_theta_cls_bigdt)
gc()

change_time = change_time$time
book = book[, c(1,2)]

num_parts = 128
n = ceiling(dim(book)[1]/num_parts)
nr <- nrow(book)
book= split(book, rep(1:ceiling(nr/n), each=n, length.out=nr))

support_vec = seq(0,support_max,delta)
support_vec_length = length(support_vec)


folder_name =  paste0("~/Desktop/orderbook/Research/R/gof/images/event_index_",event_size,data_file_name,"_support",toString(support_max),"delta", toString(delta),"/"   )
dir.create(folder_name)


for (j in 1:num_parts){
  book_sub = book[[j]]
  part_start_index = match(book_sub$time[1],change_time)
  part_end_index = match((book_sub$time[nrow(book_sub)]+support_vec[support_vec_length]),change_time)
  change_time_sub = change_time[part_start_index:part_end_index]
  
  event_index = pbmclapply( 1:dim(book_sub)[1], function(i){
    print (c(i,j))
    vec = book_sub$time[i]+support_vec
    end_index = match(vec[support_vec_length], change_time_sub )
    start_index = match(vec[1], change_time_sub[1:end_index] )
    match(vec, change_time_sub[start_index:end_index]        ) + start_index -1 + part_start_index -1
  }, mc.cores = 8  )
  saveRDS(event_index, paste0(folder_name, "part", toString(j),".rds"    ))
  
}

all_event_index = list()
for(j in 1:num_parts){
  part = readRDS( paste0(folder_name, "part", toString(j),".rds"    )   )
  all_event_index = c(all_event_index, part)
}

saveRDS(all_event_index, paste0("~/Desktop/orderbook/Research/R/gof/images/event_index_",data_file_name,"_support",toString(support_max),"delta", toString(delta),".rds"   )   )

#file1 = readRDS(paste0("~/Desktop/orderbook/Research/R/gof/images/event_index_",data_file_name,"_support",toString(support_max),"delta", toString(delta),"_", event_size ,"_new.rds"   ) )
#file2 = readRDS("~/Desktop/orderbook/Research/R/gof/images/event_index_AAPL_2019-01-02_support20delta0.5_eventsize.rds")
# for (j in 1:num_parts){
#   book_sub = book[[j]]
# 
#   event_index = parallel::mclapply( 1:dim(book_sub)[1], function(i){
#     print (i)
#     vec = book_sub$time[i]+support_vec
#     end_index = match(vec[support_vec_length], change_time )
#     start_index = match(vec[1], change_time[1:end_index] )
#     match(vec, change_time[start_index:end_index]        ) + start_index -1
# 
#   }, mc.cores = 5     )
# 
# }




# start = Sys.time()
# print ("Start obtaining event index!")
# event_index = parallel::mclapply( 1:dim(book)[1], function(i){
#   print (i)
#   vec = book$time[i]+support_vec
#   end_index = match(vec[support_vec_length], change_time )
#   start_index = match(vec[1], change_time[1:end_index] )
#   match(vec, change_time[start_index:end_index]        ) + start_index -1
#   
# }, mc.cores = 5     )
# Sys.time() - start

#save.image( paste0("~/Desktop/Research/R/gof/images/event_index_",data_file_name,"_support",toString(support_max),"delta", toString(delta),"_", event_size ,".RData"   ) )